http://dds.ec/ (blog, podcast and…)
Authored/contributed to 12 CRAN packages
head(pressure, 3)
## temperature pressure ## 1 0 0.0002 ## 2 20 0.0012 ## 3 40 0.0060
summary(pressure)
## temperature pressure ## Min. : 0 Min. : 0.0002 ## 1st Qu.: 90 1st Qu.: 0.1800 ## Median :180 Median : 8.8000 ## Mean :180 Mean :124.3367 ## 3rd Qu.:270 3rd Qu.:126.5000 ## Max. :360 Max. :806.0000
z <- seq(-10, 10, 0.01) scatterplot3js(cos(z), sin(z), z, color=rainbow(length(z)))
forceNetwork(Links = MisLinks, Nodes = MisNodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
Group = "group", opacity = 0.4)
dest <- factor(sprintf("%.2f:%.2f",flights[,3], flights[,4]))
freq <- sort(table(dest), decreasing=TRUE)
frequent_destinations <- names(freq)[1:10]
idx <- dest %in% frequent_destinations
frequent_flights <- flights[idx, ]
latlong <- unique(frequent_flights[,3:4])
earth <- system.file("images/world.jpg", package="threejs")
globejs(img=earth, lat=latlong[,1], long=latlong[,2],
arcs=frequent_flights, arcsHeight=0.3, arcsLwd=2,
arcsColor="#ffff00", arcsOpacity=0.15, atmosphere=TRUE)
https://www.rstudio.com/products/rstudio/download/
It's "vectorized" (think map() or [ for ])
a <- 1:10 sum(a)
## [1] 55
Data frames are akin to Excel/Google spreadsheets, just without the baggage
It really likes something called "data frames" (Python does too, now)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species ## 1 5.1 3.5 1.4 0.2 setosa ## 2 4.9 3.0 1.4 0.2 setosa ## 3 4.7 3.2 1.3 0.2 setosa ## 4 4.6 3.1 1.5 0.2 setosa ## 5 5.0 3.6 1.4 0.2 setosa ## 6 5.4 3.9 1.7 0.4 setosa
It has affintity for arcane punctuation:
`huh?` <- iris$Sepal.Length[[2]] * 3 %>% sqrt() print(`huh?`)
## [1] 8.487049
And, complex+efficient algorithms can be confusing:
dat <- readLines(textConnection(" 3 weeks, 2 days, 4 hours
4 week, 6 days, 12 hours
4 day, 3 hours
7 hours
8 hour"))
sapply(str_split(str_trim(dat), ",[ ]*"), function(x) {
sum(sapply(x, function(y) {
bits <- str_split(str_trim(y), "[ ]+")[[1]]
duration(as.numeric(bits[1]), bits[2])
})) / 3600
})
## [1] 556 828 99 7 8
library(ggplot2) g1 <- ggplot(mtcars, aes(x=wt, y=mpg)) + geom_smooth() + geom_point() print(g1)
This is all it takes to turn that plot into an editable/usable SVG graphic:
ggsave(g1, "img/g1.svg")
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
width="819pt" height="425pt"
viewBox="0 0 819 425" version="1.1">
<defs>
<g>
<symbol overflow="visible" id="glyph0-0">
<path style="stroke:none;"
d="M 0.3125 0 L 0.3125 -6.875 L 5.765625 -6.875 L 5.765625 0 Z M 4.90625 -0.859375 L 4.90625 -6.015625 L 1.171875 -6.015625 L 1.171875 -0.859375 Z "/>
</symbol>
<symbol overflow="visible" id="glyph0-1">
<path style="stroke:none;"
d="M 0.921875 -4.75 L 0.921875 -5.390625 C 1.523438 -5.453125 1.945312 -5.550781 2.1875 -5.6875 C 2.425781 -5.832031 2.609375 -6.164062 2.734375 -6.6875 L 3.390625 -6.6875 L 3.390625 0 L 2.5 0 L 2.5 -4.75 Z "/>
library(ggthemes) g1 + theme_economist()
library(xkcd) g1 + theme_xkcd()
g1 + theme_excel()
Let's find this data!
Let's find this data!
Let's find this data!
Almost done!
Done! (kinda)
Done! (kinda)
dat <- read.csv("data/download.csv", skip=4, header=TRUE, stringsAsFactors=FALSE)
dat <- head(dat, -2)
head(dat)
## GeoFips GeoName ## 1 00999 United States (Nonmetropolitan Portion) ## 2 10180 Abilene, TX (Metropolitan Statistical Area) ## 3 10420 Akron, OH (Metropolitan Statistical Area) ## 4 10500 Albany, GA (Metropolitan Statistical Area) ## 5 10540 Albany, OR (Metropolitan Statistical Area) ## 6 10580 Albany-Schenectady-Troy, NY (Metropolitan Statistical Area) ## X2013 ## 1 87.7 ## 2 91.3 ## 3 88.9 ## 4 84.3 ## 5 93.5 ## 6 99.0
dat$GeoName <- gsub(" \\(Metropolitan Statistical Area\\)", "",
dat$GeoName)
dat$GeoFips <- sprintf("%05d", as.numeric(dat$GeoFips))
head(dat)
## GeoFips GeoName X2013 ## 1 00999 United States (Nonmetropolitan Portion) 87.7 ## 2 10180 Abilene, TX 91.3 ## 3 10420 Akron, OH 88.9 ## 4 10500 Albany, GA 84.3 ## 5 10540 Albany, OR 93.5 ## 6 10580 Albany-Schenectady-Troy, NY 99.0
http://bea.gov/api/data/?UserID=xxxxxx-xxxx-xxxx-xxxx-xxxxxxxx& method=GetData&datasetname=RegionalData&KeyCode=RPPALL_MI& Year=2013&&ResultFormat=json"
library(jsonlite)
dat <- readJSON("that horrible URL")
dat <- dat$BEAAPI$Results$Data
dat$X2013 <- as.numeric(dat$DataValue)
dat$GeoName <- gsub(" \\(Metropolitan Statistical Area\\)", "",
dat$GeoName)
dat$GeoFips <- sprintf("%05d", as.numeric(dat$GeoFips))
head(dat[,c(1,2,8)])
## GeoFips GeoName X2013 ## 1 10180 Abilene, TX 91.3 ## 2 10420 Akron, OH 88.9 ## 3 10500 Albany, GA 84.3 ## 4 10540 Albany, OR 93.5 ## 5 10580 Albany-Schenectady-Troy, NY 99.0 ## 6 10740 Albuquerque, NM 97.1
library(httr)
response <- GET("http://bea.gov/api/data/",
query=list(
UserID=Sys.getenv("BEA_API_TOKEN"),
method="GetData",
datasetname="RegionalData",
KeyCode="RPPALL_MI",
Year="2013",
ResultFormat="json"
))
dat <- fromJSON(content(response, as="text"))
Same cleanup as we did in the raw URL version
dat <- dat$BEAAPI$Results$Data
dat$X2013 <- as.numeric(dat$DataValue)
dat$GeoName <- gsub(" \\(Metropolitan Statistical Area\\)", "",
dat$GeoName)
dat$GeoFips <- sprintf("%05d", as.numeric(dat$GeoFips))
head(dat[,c(1,2,8)])
## GeoFips GeoName X2013 ## 1 10180 Abilene, TX 91.3 ## 2 10420 Akron, OH 88.9 ## 3 10500 Albany, GA 84.3 ## 4 10540 Albany, OR 93.5 ## 5 10580 Albany-Schenectady-Troy, NY 99.0 ## 6 10740 Albuquerque, NM 97.1
library(noncensus)
data(counties)
xlate <- data.frame(fipscounty=sprintf("%s%s", counties$state_fips,
counties$county_fips),
cbsa=counties$CBSA,
stringsAsFactors=FALSE)
dat <- merge(dat[,c(1,2,8)], xlate[,c("cbsa", "fipscounty")],
by.x="GeoFips", by.y="cbsa")
head(dat)
## GeoFips GeoName X2013 fipscounty ## 1 10180 Abilene, TX 91.3 48441 ## 2 10180 Abilene, TX 91.3 48253 ## 3 10180 Abilene, TX 91.3 48059 ## 4 10420 Akron, OH 88.9 39133 ## 5 10420 Akron, OH 88.9 39153 ## 6 10500 Albany, GA 84.3 13177
Get map data
library(rgdal)
URL <- "http://bl.ocks.org/mbostock/raw/4090846/us.json"
fil <- basename(URL)
if (!file.exists(fil)) download.file(URL, fil)
# read state borders from the file
states <- readOGR(fil, "states", stringsAsFactors=FALSE,
verbose=FALSE)
# read county borders from the file
county <- readOGR(fil, "counties", stringsAsFactors=FALSE,
verbose=FALSE)
We don't want to display all the counties, so we'll subtract out the ones that aren't in our data set.
rpp_counties <- subset(county, id %in% dat$fipscounty)
rpp_counties <- merge(rpp_counties, dat,
by.x="id",
by.y="fipscounty",
all.x=TRUE)
We need to setup the color scale (really similar to how you'd do it in JS)
library(leaflet)
pal <- colorBin("BrBG", range(rpp_counties$X2013), bins=5)
rpp_counties$color <- pal(rpp_counties$X2013)
leaflet() %>%
addProviderTiles("Acetate.terrain") %>%
addPolygons(data=rpp_counties, weight=0.25,
fillColor=~color, color="black", fillOpacity=1,
popup=~sprintf("In %s, <span style='font-weight:700'>%s</span> has the purchasing power of $100.00.",
htmlEscape(GeoName),
htmlEscape(dollar(X2013)))) %>%
addPolygons(data=states, weight=0.5, fillColor="white", fillOpacity=0, fill=FALSE, color="#525252") %>%
addLegend(position="bottomright", pal=pal, values=rpp_counties$X2013, labFormat=labelFormat("$"), opacity = 1) %>%
setView(-74.0059, 40.7127, 6)
~170 lines pure leaflet/javascript
vs
~60 lines of R
…and the R version can be instantly used to get new BEA data sets where the leaflet one "cheated" and merged the data prior to the HTML example.
General
readr / rio faster & more robust compatibilityreadxl (and others) for raw Excel readinggooglesheetsdata.table (large data)Web Scraping / API
httr (like curl command line but better)rvest (more structured web page scraping)jsonlite (JSON)XML / xml2 (XML)Rselenium (headless browser & DOM scraping)V8 (the V8 engine in R)Database
dplyrRPostgreSQLRMySQLrredismongoliteRSQLiteDatabase
dplrRPostgreSQLRMySQLrredismongoliteRSQLitehttp://www.verizonenterprise.com/DBIR/2015/
200,000 incidents/breaches
~3,000 data elements per record
~150 lines of statistical analysis
OpenCPU
plumber http://plumber.trestletech.com/ (think "Flask")httpuv https://github.com/rstudio/httpuv (basic web & websocket server)Shiny http://shiny.rstudio.com/
htmlwidgets http://www.htmlwidgets.org/leaflet)library(taucharts)
data(cars_data)
tauchart(cars_data) %>%
tau_point("milespergallon", c("class", "price"), color="class") %>%
tau_trendline() %>% tau_legend()
devtools::create("/path/to/new/package")setwd("/path/to/new/package")or use RStudio
htmlwidgets::scaffoldWidget()devtools::build()devtolls::install()or use RStudio